In [ ]:
import sys
IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    !pip install --quiet scvi-colab
    from scvi_colab import install
    install()
    !pip install --quiet git+https://github.com/BayraktarLab/cell2location#egg=cell2location[tutorials]
In [ ]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

import cell2location

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs
/home/biogenger/miniconda3/envs/c2l/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
/home/biogenger/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/_settings.py:63: UserWarning: Since v1.0.0, scvi-tools no longer uses a random seed by default. Run `scvi.settings.seed = 0` to reproduce results from previous versions.
  self.seed = seed
/home/biogenger/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/_settings.py:70: UserWarning: Setting `dl_pin_memory_gpu_training` is deprecated in v1.0 and will be removed in v1.1. Please pass in `pin_memory` to the data loaders instead.
  self.dl_pin_memory_gpu_training = (
In [ ]:
### setting ###
st_sample = "E12.5_slice13"
sc_sample = "sc_E12"
out_path = "/media/biogenger/D/Projects/CMY/Analysis/mouse_lung/Overall/Fig2/Deconvolution/Cell2Location/result"
In [ ]:
# read the seurat object
tmp = "/media/server/data/LJJ/Cell2Location/srat.merge.annotated.h5ad"
adata_st = sc.read_h5ad(tmp)
adata_st.X = adata_st.raw.X.copy()
del adata_st.raw
In [ ]:
# find mitochondria-encoded (MT) genes
adata_st.var["useless_gene"] = [gene.startswith("mt-") for gene in adata_st.var.index] or [gene.startswith("Hba-") for gene in adata_st.var.index] or [gene.startswith("Hbb-") for gene in adata_st.var.index]

# remove MT genes for spatial mapping (keeping their counts in the object)
adata_st.obsm["useless_gene"] = adata_st[:, adata_st.var["useless_gene"].values].X.toarray()
adata_st = adata_st[:, ~adata_st.var["useless_gene"].values]
In [ ]:
# read the seurat object
tmp = "/media/server/data/LJJ/Cell2Location/"+sc_sample+"/h5ad/srat.merge.annotated.h5ad"
adata_sc = sc.read_h5ad(tmp)
adata_sc.X = adata_sc.raw.X.copy()
del adata_sc.raw
In [ ]:
from cell2location.utils.filtering import filter_genes
selected = filter_genes(adata_sc, cell_count_cutoff=5, cell_percentage_cutoff2=0.05, nonz_mean_cutoff=1.1)
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/pandas/core/arraylike.py:396: RuntimeWarning: divide by zero encountered in log10
  result = getattr(ufunc, method)(*inputs, **kwargs)
In [ ]:
# filter the object
adata_sc = adata_sc[:, selected].copy()
In [ ]:
# prepare anndata for the regression model
cell2location.models.RegressionModel.setup_anndata(adata=adata_sc, labels_key="split_reanno_ctp")
/home/server/miniconda3/envs/c2l/lib/python3.9/abc.py:119: FutureWarning: SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.

For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.

For creation, use `anndata.experimental.sparse_dataset(X)` instead.

  return _abc_instancecheck(cls, instance)
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
In [ ]:
# create the regression model
from cell2location.models import RegressionModel
mod = RegressionModel(adata_sc)

# view anndata_setup as a sanity check
mod.view_anndata_setup()
Anndata setup with scvi-tools version 1.0.4.


Setup via `RegressionModel.setup_anndata` with arguments:
{
'layer': None,
'batch_key': None,
'labels_key': 'split_reanno_ctp',
'categorical_covariate_keys': None,
'continuous_covariate_keys': None
}


         Summary Statistics         
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃     Summary Stat Key      Value ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
│         n_batch             1   │
│         n_cells           3958  │
│ n_extra_categorical_covs    0   │
│ n_extra_continuous_covs     0   │
│         n_labels           15   │
│          n_vars           12624 │
└──────────────────────────┴───────┘
               Data Registry                
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Registry Key     scvi-tools Location    ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│      X                 adata.X          │
│    batch      adata.obs['_scvi_batch']  │
│    ind_x        adata.obs['_indices']   │
│    labels     adata.obs['_scvi_labels'] │
└──────────────┴───────────────────────────┘
                     batch State Registry                      
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃     Source Location       Categories  scvi-tools Encoding ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ adata.obs['_scvi_batch']      0                0          │
└──────────────────────────┴────────────┴─────────────────────┘
                             labels State Registry                              
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃        Source Location               Categories        scvi-tools Encoding ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ adata.obs['split_reanno_ctp']           ASMC                    0          │
│                                    Adventitial FB               1          │
│                                    Arterial maEC                2          │
│                                    Cardiomyocyte                3          │
│                                     Chondrocyte                 4          │
│                                      Lymphatic                  5          │
│                                     Mesothelium                 6          │
│                                    Myofibroblast                7          │
│                                    Neuroendocrine               8          │
│                                     Prolif. gCap                9          │
│                                Sox2+ Early Epithelium          10          │
│                                Sox9+ Early Epithelium          11          │
│                                       Wnt2+ FB                 12          │
│                                         gCap                   13          │
│                                   prolif. Wnt2+ FB             14          │
└───────────────────────────────┴────────────────────────┴─────────────────────┘
In [ ]:
mod.train(max_epochs=250, batch_size=2500, train_size=1, use_gpu=True)
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/train/_trainrunner.py:76: UserWarning: `use_gpu` is deprecated in v1.0 and will be removed in v1.1. Please use `accelerator` and `devices` instead.
  accelerator, lightning_devices, device = parse_device_args(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/lightning/pytorch/trainer/configuration_validator.py:69: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
You are using a CUDA device ('NVIDIA GeForce RTX 4080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:281: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/home/server/miniconda3/envs/c2l/lib/python3.9/abc.py:119: FutureWarning: SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.

For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.

For creation, use `anndata.experimental.sparse_dataset(X)` instead.

  return _abc_instancecheck(cls, instance)
Epoch 250/250: 100%|█| 250/250 [01:21<00:00,  3.10it/s, v_num=1, elbo_train=4.64
`Trainer.fit` stopped: `max_epochs=250` reached.
Epoch 250/250: 100%|█| 250/250 [01:21<00:00,  3.06it/s, v_num=1, elbo_train=4.64
In [ ]:
mod.plot_history(20)
In [ ]:
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_sc = mod.export_posterior(adata_sc, sample_kwargs={'num_samples': 1000, 'batch_size': 2500, 'use_gpu': True})
# adata_sc = mod.export_posterior(adata_sc, use_quantiles=True, add_to_varm=["means", "stds", "q05","q50", "q95", "q0001"], sample_kwargs={'batch_size': 2500, 'use_gpu': True})

import os
os.makedirs(os.path.join(out_path, st_sample, "reference_signatures"), )

# Save anndata object with results
adata_sc.write(os.path.join(out_path, st_sample, "reference_signatures", "sc.h5ad"))
# adata_sc = sc.read_h5ad(os.path.join(out_path, st_sample, "reference_signatures", "sc.h5ad"))

# Save model
mod.save(os.path.join(out_path, st_sample, "reference_signatures"), overwrite=True)
# mod = cell2location.models.RegressionModel.load(os.path.join(out_path, st_sample, "reference_signatures"), adata_sc)
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/model/base/_pyromixin.py:388: UserWarning: `use_gpu` is deprecated in v1.0 and will be removed in v1.1. Please use `accelerator` and `devices` instead.
  _, _, device = parse_device_args(
Sampling local variables, batch:   0%|                    | 0/2 [00:00<?, ?it/s]
Sampling global variables, sample: 100%|█████| 999/999 [00:07<00:00, 130.27it/s]
In [ ]:
mod.plot_QC()
In [ ]:
# export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in adata_sc.varm.keys():
    inf_aver = adata_sc.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}' for i in adata_sc.uns['mod']['factor_names']]].copy()
else:
    inf_aver = adata_sc.var[[f'means_per_cluster_mu_fg_{i}' for i in adata_sc.uns['mod']['factor_names']]].copy()
inf_aver.columns = adata_sc.uns['mod']['factor_names']
inf_aver.iloc[0:5, 0:5]
Out[ ]:
ASMC Adventitial FB Arterial maEC Cardiomyocyte Chondrocyte
Sox17 0.010591 0.002652 0.019115 0.000280 0.000279
Mrpl15 1.070708 1.314019 1.134181 4.892347 1.345393
Lypla1 0.583826 0.532750 0.524988 1.328794 0.604647
Tcea1 2.143165 2.459221 2.650915 5.109345 2.426383
Atp6v1h 0.474455 0.410474 0.385250 0.916307 0.364681
In [ ]:
##################################
# Cell2location: spatial mapping #
##################################
In [ ]:
adata_vis = adata_st[adata_st.obs["orig.ident"].isin([st_sample])].copy()
In [ ]:
# find shared genes and subset both anndata and reference signatures
intersect = np.intersect1d(adata_vis.var_names, inf_aver.index)
adata_vis = adata_vis[:, intersect].copy()
inf_aver = inf_aver.loc[intersect, :].copy()

# prepare anndata for cell2location model
cell2location.models.Cell2location.setup_anndata(adata=adata_vis, labels_key="split_ctp")
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/data/fields/_dataframe_field.py:189: UserWarning: Category 3 in adata.obs['_scvi_labels'] has fewer than 3 cells. Models may not train properly.
  categorical_mapping = _make_column_categorical(
In [ ]:
# create and train the model
mod = cell2location.models.Cell2location(
    adata_vis, cell_state_df=inf_aver,
    # the expected average cell abundance: tissue-dependent
    # hyper-prior which can be estimated from paired histology:
    N_cells_per_location=4,
    # hyperparameter controlling normalisation of
    # within-experiment variation in RNA detection:
    detection_alpha=20
)
mod.view_anndata_setup()
Anndata setup with scvi-tools version 1.0.4.


Setup via `Cell2location.setup_anndata` with arguments:
{
'layer': None,
'batch_key': None,
'labels_key': 'split_ctp',
'categorical_covariate_keys': None,
'continuous_covariate_keys': None
}


         Summary Statistics         
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃     Summary Stat Key      Value ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
│         n_batch             1   │
│         n_cells           1867  │
│ n_extra_categorical_covs    0   │
│ n_extra_continuous_covs     0   │
│         n_labels           24   │
│          n_vars           12201 │
└──────────────────────────┴───────┘
               Data Registry                
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Registry Key     scvi-tools Location    ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│      X                 adata.X          │
│    batch      adata.obs['_scvi_batch']  │
│    ind_x        adata.obs['_indices']   │
│    labels     adata.obs['_scvi_labels'] │
└──────────────┴───────────────────────────┘
                     batch State Registry                      
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃     Source Location       Categories  scvi-tools Encoding ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ adata.obs['_scvi_batch']      0                0          │
└──────────────────────────┴────────────┴─────────────────────┘
                       labels State Registry                       
┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃    Source Location         Categories     scvi-tools Encoding ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ adata.obs['split_ctp']        AEC                  0          │
│                               ASMC                 1          │
│                             ASMC.pre               2          │
│                             AT1.pre                3          │
│                             AT2.pre                4          │
│                         Alveolar.Mixture           5          │
│                           Chondrocyte              6          │
│                          Ciliated.like             7          │
│                              EC.pro                8          │
│                            Il31ra.FB               9          │
│                               LEC                 10          │
│                            Matrix.FB              11          │
│                           Mesothelium             12          │
│                             Myocyte               13          │
│                               PMP                 14          │
│                            PNEC.like              15          │
│                          Secretory.like           16          │
│                             Sox2.pro              17          │
│                             Sox9.pro              18          │
│                               VEC                 19          │
│                               VSMC                20          │
│                             VSMC.pre              21          │
│                          adventitial.FB           22          │
│                              myo.FB               23          │
└────────────────────────┴──────────────────┴─────────────────────┘
In [ ]:
mod.train(max_epochs=30000, batch_size=None, train_size=1, use_gpu=True)
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/train/_trainrunner.py:76: UserWarning: `use_gpu` is deprecated in v1.0 and will be removed in v1.1. Please use `accelerator` and `devices` instead.
  accelerator, lightning_devices, device = parse_device_args(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/lightning/pytorch/trainer/configuration_validator.py:69: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:281: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
Epoch 30000/30000: 100%|█| 30000/30000 [31:10<00:00, 16.24it/s, v_num=1, elbo_tr
`Trainer.fit` stopped: `max_epochs=30000` reached.
Epoch 30000/30000: 100%|█| 30000/30000 [31:10<00:00, 16.04it/s, v_num=1, elbo_tr
In [ ]:
# plot ELBO loss history during training, removing first 100 epochs from the plot
mod.plot_history(1000)
plt.legend(labels=["full data training"])
Out[ ]:
<matplotlib.legend.Legend at 0x7fefc4f56f40>
In [ ]:
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_vis = mod.export_posterior(adata_vis, sample_kwargs={'num_samples': 1000, 'batch_size': mod.adata.n_obs, 'use_gpu': True})

os.makedirs(os.path.join(out_path, st_sample, "cell2location_map"))

# Save anndata object with results
adata_vis.write(os.path.join(out_path, st_sample, "cell2location_map", "sp.h5ad"))
# adata_vis = sc.read_h5ad(os.path.join(out_path, st_sample, "cell2location_map", "sp.h5ad"))

# Save model
mod.save(os.path.join(out_path, st_sample, "cell2location_map"), overwrite=True)
# mod = cell2location.models.Cell2location.load(os.path.join(out_path, st_sample, "cell2location_map"), adata_vis)
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/scvi/model/base/_pyromixin.py:388: UserWarning: `use_gpu` is deprecated in v1.0 and will be removed in v1.1. Please use `accelerator` and `devices` instead.
  _, _, device = parse_device_args(
Sampling local variables, batch: 100%|████████████| 1/1 [00:13<00:00, 13.53s/it]
Sampling global variables, sample: 100%|██████| 999/999 [00:13<00:00, 72.12it/s]
In [ ]:
mod.plot_QC()
In [ ]:
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata_vis.obs[adata_vis.uns["mod"]["factor_names"]] = adata_vis.obsm["q05_cell_abundance_w_sf"]

# select one slide
from cell2location.utils import select_slide
slide = select_slide(adata_vis, s=st_sample, batch_key="orig.ident")

# plot in spatial coordinates
with mpl.rc_context({"axes.facecolor": "black", "figure.figsize": [5, 5.5]}):
    # adata_sc.obs["split_reanno_ctp"].unique().tolist()
    sc.pl.spatial(slide, cmap="magma", color=adata_sc.obs["split_reanno_ctp"].unique().tolist(),
                  ncols=4, size=20, img_key="hires",
                  # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax="p99.2")
/home/server/miniconda3/envs/c2l/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1207: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if not is_categorical_dtype(values):
In [ ]:
# Now we use cell2location plotter that allows showing multiple cell types in one panel
from cell2location.plt import plot_spatial

# select up to 6 clusters
clust_labels = ["Wnt2+ FB", "gCap", "Myofibroblast"]
clust_col = ["" + str(i) for i in clust_labels] # in case column names differ from labels

slide = select_slide(adata_vis, st_sample, batch_key="orig.ident")

with mpl.rc_context({"figure.figsize": (10, 10)}):
    fig = plot_spatial(
        adata=slide,
        # labels to show on a plot
        color=clust_col, labels=clust_labels,
        show_img=True,
        # 'fast' (white background) or 'dark_background'
        style="fast",
        # limit color scale at 99.2% quantile of cell abundance
        max_color_quantile=0.992,
        # size of locations (adjust depending on figure size)
        circle_diameter=6,
        colorbar_position="right"
    )
In [ ]:
adata_vis.obsm["q05_cell_abundance_w_sf"].to_csv(os.path.join(out_path, st_sample, "q05_cell_abundance_w_sf.csv"))
In [ ]: